package com.spiderman.test;

import java.util.List;

import com.spiderman.entity.Movie;
import com.spiderman.utils.ConnectionUtil;
import com.spiderman.utils.RegExUtil;

/**
 * Description: 爬虫测试
 * @author Kwum
 * @date 2017年8月5日 上午11:06:14
 * @version 1.0
 */

public class Demo {
    
    //自定义正则表达式，需要找的一段数据先从html中复制出来，变量改成(.*?)，"改成\"
    private static final String regEx_movie = "<li><aclass=\"play-pic\"target=\"_blank\"href=\"(.*?)\">"
            + "<imgsrc=\"(.*?)\"alt=\"(.*?)\"/><spanclass=\"play-icon\">&nbsp;</span></a><h5><atarget=\"_blank\"href=\"(.*?)\">(.*?)</a>"
            + "</h5><pclass=\"actor\"><em>主演:</em>(.*?)</p><pclass=\"type\"><em>类型：</em>(.*?)</p><pclass=\"plot\">"
            + "<em>剧情：</em>(.*?)</p><pclass=\"more-desc\"><aclass=\"more\"target=\"_blank\"href=\"(.*?)\">详细</a></p></li>";

    public static void main(String[] args) {
        List<Movie> list = RegExUtil.getMovie(ConnectionUtil.connectByHttpURLConnection("http://104.250.153.205/aiqing/index2.html", "gbk"), regEx_movie);
        for(Movie m : list){
            System.out.println(m.toString());
        }
    }

}
